import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("whitegrid")
data = pd.read_csv("/Users/linhvu/Desktop/Meta Data Challenge/netflix_titles.csv")
data['country'].replace(np.nan, 'No Data',inplace = True)
data['cast'].replace(np.nan, 'No Data',inplace = True)
data['director'].replace(np.nan, 'No Data',inplace = True)
data.dropna(inplace=True)
data['date_added'] = pd.to_datetime(data['date_added'])
data['month_added']=data['date_added'].dt.month
data['month_name_added']=data['date_added'].dt.month_name()
data['year_added'] = data['date_added'].dt.year
data['country'] = data['country'].dropna().apply(lambda x : x.replace(' ,',',').replace(', ',',').split(','))
lst_col = 'country'
data2 = pd.DataFrame({
col : np.repeat(data[col].values, data[lst_col].str.len())
for col in data.columns.drop(lst_col)}
).assign(**{lst_col:np.concatenate(data[lst_col].values)})[data.columns.tolist()]
import plotly.express as px
year_country2 = data2.groupby('year_added')['country'].value_counts().reset_index(name='counts')
fig = px.choropleth(year_country2, locations="country", color="counts",
locationmode='country names',
animation_frame='year_added',
range_color=[0,200],
color_continuous_scale=px.colors.sequential.OrRd
)
fig.update_layout(title='Comparison by country')
fig.show()